###############################################################################-
# Author:  Pietryka
# Contact: matthew.pietryka@gmail.com
# Purpose: perform the permutations to generate null distributions
# Notes:   This script takes 5-10 minutes to run on a typical 2024 PC
###############################################################################-



# number of simulations

n_sims <- 10000

#  1. Load Packages  =====================

library(dplyr)
library(tidyr)
library(readr)
library(purrr)
library(furrr)


#  2. Load Data =====================

room_affil_random_df <- read_rds("data-files/room_affil_random_df.rds")


# 3. define permutation functions ==========================

randomize_rooms <- function(df){
  
  df  %>% 
    mutate(rooms_random = sample(anon_address))  %>% 
    select(-anon_address)
}

# function to generate unique dyad ID

generate_dyad_id <- function(id1, id2){
  id_min <- min(as.numeric(id1), as.numeric(id2))
  id_max <- max(as.numeric(id1), as.numeric(id2))
  paste(as.character(id_min), as.character(id_max), sep = "-")
}


#  4. Perform Permutations - university wide  =====================

ids_df <- room_affil_random_df   %>% 
  select(anon_id, term_code, anon_address)  


# prepare multi-session system

future::plan(multisession)


# generate simulated dyads

set.seed(7371765)


random_unconstrained_df <-
  map(seq_len(n_sims), ~ids_df) %>%
  future_map_dfr(
    randomize_rooms,
    .id = "sim_id",
    .options = furrr_options(seed = TRUE),
    .progress = TRUE
  )


replicates_unconstrained_df   <-
  random_unconstrained_df  %>% 
  inner_join(
    random_unconstrained_df, 
    by = c("sim_id", "term_code", "rooms_random"), 
    suffix = c("1", "2"),
    relationship = "many-to-many" 
    )  %>% 
  filter(anon_id1  != anon_id2 )  %>% 
  # unique ID for each dyad
  mutate(dyad_id = map2_chr(anon_id1, anon_id2, generate_dyad_id))  %>%
  group_by(sim_id)  %>% 
  distinct(dyad_id, .keep_all = TRUE)  %>% 
  ungroup()


#  5. Perform Permutations - constrained within halls  =====================

# data nested within halls

perm_in_df <- room_affil_random_df  %>%
  select(gender, term_code, hall_id, room_n, anon_id, anon_address)  %>% 
  group_by(gender, term_code, hall_id, room_n)  %>% 
  nest(original_affil = c(anon_id, anon_address))  %>% 
  ungroup()  



# prepare multi-session system

future::plan(multisession)

# generate simulated dyads

set.seed(774741365)

random_df <- map(seq_len(n_sims), ~perm_in_df) %>%
  future_map_dfr(
    ~ .x %>%
      mutate(random_affil = map(original_affil, randomize_rooms)),
    .id = "sim_id",
    .options = furrr_options(seed = TRUE),
    .progress = TRUE
  ) %>%
  select(sim_id, term_code, random_affil) %>%
  unnest(cols = c(random_affil))


replicates_df   <-
  random_df  %>% 
  inner_join(
    random_df, 
    by = c("sim_id", "term_code", "rooms_random"), 
    suffix = c("1", "2"),
    relationship = "many-to-many"
    )  %>% 
  filter(anon_id1  != anon_id2 )  %>% 
  # unique ID for each dyad
  mutate(dyad_id = map2_chr(anon_id1, anon_id2, generate_dyad_id))  %>%
  group_by(sim_id)  %>% 
  distinct(dyad_id, .keep_all = TRUE)  %>% 
  ungroup()


# 6. Save =========================================================

write_rds(replicates_unconstrained_df, "data-files/replicates_unconstrained_df.rds")
write_rds(replicates_df, "data-files/replicates_df.rds")

beepr::beep(sound = 8)






